Read data

read_tongue <- function(file) {
  tibble <- read_csv(file, col_names = c("X", "Y", "Z")) %>%
    mutate(file = substr(file, 26, 42))
  
  tibble <- mutate(tibble, index = seq(1, nrow(tibble)))
  
  return(tibble)
}

stimuli <- read_csv("italian-sz/data/stimuli.csv")

tongue <- list.files(
  path = "italian-sz/data/datasets",
  pattern = "*.csv",
  full.names = TRUE
) %>%
  map_df(~read_tongue(.)) %>%
  left_join(y = stimuli) %>%
  mutate(phone = ordered(phone, levels = c("s", "z"))) %>%
  mutate_if(is.character, as.factor) %>%
  create_event_start("file")

contrasts(tongue$phone) <- "contr.treatment"

3D tongue modelling with GAMs

Tongue GAM

tongue_gam <- bam(
  Z ~
    s(X, Y),
  data = tongue
)
vis.gam(tongue_gam, theta = -35, phi = 40, scale = FALSE, xlim = c(1, 13), ylim = c(3, 10), zlim = c(3, 8), color = "terrain")

Tongue GAM 2

tongue_gam_2 <- bam(
  Z ~
    s(X, Y, by = phone),
  data = tongue
)
vis.gam(tongue_gam_2, theta = -35, phi = 40, scale = FALSE, cond = list(phone = "s"), ticktype = "detailed", xlim = c(1, 13), ylim = c(3, 10), zlim = c(3, 8), main = "[s]", color = "terrain")
vis.gam(tongue_gam_2, theta = -35, phi = 40, scale = FALSE, cond = list(phone = "z"), ticktype = "detailed", xlim = c(1, 13), ylim = c(3, 10), zlim = c(3, 8), main = "[z]", color = "terrain")
vis.gam(tongue_gam_2, theta = -35, phi = 40, scale = FALSE, cond = list(phone = "s"), ticktype = "detailed", xlim = c(1, 13), ylim = c(3, 10), zlim = c(3, 8), main = "[s]", color = "terrain", se = 1.96)
vis.gam(tongue_gam_2, theta = -35, phi = 40, scale = FALSE, cond = list(phone = "z"), ticktype = "detailed", xlim = c(1, 13), ylim = c(3, 10), zlim = c(3, 8), main = "[z]", color = "terrain", se = 1.96)

Tongue GAM 3

tongue_gam_3 <- bam(
  Z ~
    phone +
    s(X, Y) +
    s(X, Y, by = phone),
  data = tongue
)
vis.gam(tongue_gam_3, view = c("X", "Y"), theta = -35, phi = 50, scale = FALSE, cond = list(phone = "s"), ticktype = "detailed", xlim = c(1, 13), ylim = c(3, 10), zlim = c(3, 8), main = "[s]", color = "terrain", too.far = .01)
vis.gam(tongue_gam_3, view = c("X", "Y"), theta = -35, phi = 50, scale = FALSE, cond = list(phone = "z"), ticktype = "detailed", xlim = c(1, 13), ylim = c(3, 10), zlim = c(3, 8), main = "[z]", color = "terrain", too.far = .01)
vis.gam(tongue_gam_3, view = c("X", "Y"), theta = -35, phi = 50, scale = FALSE, cond = list(phone = "s"), ticktype = "detailed", xlim = c(1, 13), ylim = c(3, 10), zlim = c(3, 8), main = "[s]", color = "terrain", se = 1.96, too.far = .01)
vis.gam(tongue_gam_3, view = c("X", "Y"), theta = -35, phi = 50, scale = FALSE, cond = list(phone = "z"), ticktype = "detailed", xlim = c(1, 13), ylim = c(3, 10), zlim = c(3, 8), main = "[z]", color = "terrain", se = 1.96, too.far = .01)
vis.gam(tongue_gam_3, view = c("X", "Y"), theta = -90, phi = 0, scale = FALSE, cond = list(phone = "s"), ticktype = "detailed", xlim = c(1, 13), ylim = c(3, 10), zlim = c(3, 8), main = "[s]", color = "terrain", too.far = .01)
vis.gam(tongue_gam_3, view = c("X", "Y"), theta = -90, phi = 0, scale = FALSE, cond = list(phone = "z"), ticktype = "detailed", xlim = c(1, 13), ylim = c(3, 10), zlim = c(3, 8), main = "[z]", color = "terrain", too.far = .01)
vis.gam(tongue_gam_3, view = c("X", "Y"), theta = 90, phi = 0, scale = FALSE, cond = list(phone = "s"), ticktype = "detailed", xlim = c(1, 13), ylim = c(3, 10), zlim = c(3, 8), main = "[s]", color = "terrain", too.far = .01)
vis.gam(tongue_gam_3, view = c("X", "Y"), theta = 90, phi = 0, scale = FALSE, cond = list(phone = "z"), ticktype = "detailed", xlim = c(1, 13), ylim = c(3, 10), zlim = c(3, 8), main = "[z]", color = "terrain", too.far = .01)

Tongue GAM with AR1

rho <- start_value_rho(tongue_gam_3)

gam_ar <- bam(
  Z ~
    phone +
    s(X, Y) +
    s(X, Y, by = phone),
  data = tongue,
  AR.start = tongue$start.event,
  rho = rho
)
acf_resid(gam_ar, split_pred = "AR.start")

vis.gam(gam_ar, view = c("X", "Y"), theta = -35, phi = 50, scale = FALSE, cond = list(phone = "s"), ticktype = "detailed", xlim = c(1, 13), ylim = c(3, 10), zlim = c(3, 8), main = "[s]", color = "terrain", too.far = .05)

vis.gam(gam_ar, view = c("X", "Y"), theta = -35, phi = 50, scale = FALSE, cond = list(phone = "z"), ticktype = "detailed", xlim = c(1, 13), ylim = c(3, 10), zlim = c(3, 8), main = "[z]", color = "terrain", too.far = .05)

vis.gam(gam_ar, view = c("X", "Y"), theta = 0, phi = 50, scale = FALSE, cond = list(phone = "s"), ticktype = "detailed", xlim = c(1, 13), ylim = c(3, 10), zlim = c(3, 8), main = "[s]", color = "terrain", too.far = .05)

vis.gam(gam_ar, view = c("X", "Y"), theta = 0, phi = 50, scale = FALSE, cond = list(phone = "z"), ticktype = "detailed", xlim = c(1, 13), ylim = c(3, 10), zlim = c(3, 8), main = "[z]", color = "terrain", too.far = .05)

Volume increase

x_min <- min(tongue$X)
x_max <- max(tongue$X)
y_min <- min(tongue$Y)
y_max <- max(tongue$Y)

# File is needed in the grid for midsagittal data (it groups by file)
grid <- expand.grid(X = seq(x_min, x_max, length = 100), Y = seq(y_min, y_max, length = 100), file = levels(tongue$file))
grid <- cbind(grid, phone = rep(rep(c("s", "z"), each = 10000), 5))

predicted <- predict(gam_ar, newdata = grid)
predicted_df <- cbind(grid, predicted) %>%
  dplyr::select(-file) %>%
  unique() %>%
  spread(phone, predicted) %>%
  mutate(
    difference = s - z
  )

sum_diff <- sum(predicted_df$difference)
volume_diff <- sum_diff * (x_max - x_min)/100 * (y_max - y_min)/100
cat("The volume increase for [z] over [s] is", round(volume_diff, 2), "cm^3 (= ml)")
## The volume increase for [z] over [s] is 16.82 cm^3 (= ml)
interp_data <- interp(tongue$X, tongue$Y, tongue$Z, xo = seq(x_min, x_max, length = 100), yo = seq(y_min, y_max, length = 100), duplicate = "mean")

interp_m <- interp_data$z
interp_m <- ifelse(!is.na(interp_m), 1, 0)

predicted_df_2 <- predicted_df
predicted_df_2$mask <- as.vector(t(interp_m))

predicted_df_2 <- predicted_df_2 %>%
  mutate(diff_2 = difference * mask)

sum_diff_2 <- sum(predicted_df_2$diff_2)
volume_diff_2 <- sum_diff_2 * (x_max - x_min)/100 * (y_max - y_min)/100
cat("The volume increase for [z] over [s] is", round(volume_diff_2, 2), "cm^3 (= ml)")
## The volume increase for [z] over [s] is 12.08 cm^3 (= ml)
predicted_se <- as_tibble(predict(gam_ar, newdata = grid, se = TRUE))

predicted_df_se <- cbind(grid, predicted_se) %>%
  dplyr::select(-file) %>%
  unique() %>%
  mutate(
    CI_up = fit + se.fit * 1.96,
    CI_lo = fit - se.fit * 1.96
  )
  
predicted_ci_up <- dplyr::select(predicted_df_se, X, Y, CI_up, phone) %>%
  spread(phone, CI_up) %>%
  mutate(
    CI_up_diff = s - z
  ) %>%
  dplyr::select(-s , -z)

predicted_ci_lo <- dplyr::select(predicted_df_se, X, Y, CI_lo, phone) %>%
  spread(phone, CI_lo) %>%
  mutate(
    CI_lo_diff = s - z
  ) %>%
  dplyr::select(-s , -z)

predicted_ci <- full_join(predicted_ci_up, predicted_ci_lo)
## Joining, by = c("X", "Y")
predicted_ci$mask <- as.vector(t(interp_m))

predicted_ci_2 <- predicted_ci %>%
  mutate(
    CI_up_diff_2 = CI_up_diff * mask,
    CI_lo_diff_2 = CI_lo_diff * mask
    )

sum_ci_up_diff_2 <- sum(predicted_ci_2$CI_up_diff_2)
sum_ci_lo_diff_2 <- sum(predicted_ci_2$CI_lo_diff_2)
volume_up_diff_2 <- sum_ci_up_diff_2 * (x_max - x_min)/100 * (y_max - y_min)/100
volume_lo_diff_2 <- sum_ci_lo_diff_2 * (x_max - x_min)/100 * (y_max - y_min)/100
cat("The volume increase for [z] over [s] is between", round(volume_up_diff_2, 2), "and", round(volume_lo_diff_2, 2), "cm^3 (= ml)\n")
## The volume increase for [z] over [s] is between 12 and 12.17 cm^3 (= ml)

Token variation

files_interp <- NULL

for (file in 1:length(levels(tongue$file))) {
  file_data <- tongue[tongue$file == levels(tongue$file)[file],]
  
  file_interp_data <- interp(file_data$X, file_data$Y, file_data$Z, xo = seq(x_min, x_max, length = 100), yo = seq(y_min, y_max, length = 100))
  
  files_interp <- c(files_interp, as.vector(t(file_interp_data$z)))
}

tongue_interp <- cbind(grid, interp = files_interp) %>%
  group_by(phone, X, Y) %>%
  summarise(min = min(interp, na.rm = TRUE), max = max(interp, na.rm = TRUE)) %>%
  mutate(
    min = ifelse(min == Inf, NA, min),
    max = ifelse(max == -Inf, NA, max)
  )

s_interp <- filter(tongue_interp, phone == "s")
s_min_m <- matrix(s_interp$min, 100, 100)

z_interp <- filter(tongue_interp, phone == "z")
z_max_m <- matrix(z_interp$max, 100, 100)

color <- rep(0, length(s_min_m))
dim(color) <- dim(s_min_m)

color2 <- rep(1, length(z_max_m))
dim(color2) <- dim(z_max_m)
plot_ly(colors = c("black", "orange")) %>%
  add_surface(
    z = ~s_min_m,
    surfacecolor = color,
    cauto = F,
    cmax = 1,
    cmin = 0,
    opacity = 0.8
  ) %>%
  add_surface(
    z = ~z_max_m,
    surfacecolor = color2,
    cauto = F,
    cmax = 1,
    cmin = 0,
    opacity = 0.8
  ) %>%
  layout(scene = list(aspectmode = "manual", aspectratio = list(x = 0.6, y = 1, z = 0.5)))
f_sum_diff <- sum(s_interp$min - z_interp$max, na.rm = TRUE)
f_volume_diff <- f_sum_diff * (x_max - x_min)/100 * (y_max - y_min)/100
cat("The volume increase for [z] over [s] is", round(f_volume_diff, 2), "cm^3 (= ml)")
## The volume increase for [z] over [s] is -2.68 cm^3 (= ml)

Midsagittal

midsagittal <- cbind(grid, interp = files_interp) %>%
  filter(X > 6.8 & X < 6.9) %>%
  na.omit() %>%
  group_by(file) %>%
  mutate(smoothed = WMA(interp)) # smoothed with a weighted moving

parasaggital_l <- cbind(grid, interp = files_interp) %>%
  filter(X > 3 & X < 3.1) %>%
  na.omit() %>%
  group_by(file) %>%
  mutate(smoothed = WMA(interp, n = 9)) # smoothed with a weighted moving

parasaggital_r <- cbind(grid, interp = files_interp) %>%
  filter(X > 10.2 & X < 10.3) %>%
  na.omit() %>%
  group_by(file) %>%
  mutate(smoothed = WMA(interp)) # smoothed with a weighted moving
ggplot(midsagittal, aes(Y, interp, colour = phone, group = file)) +
  geom_path()

ggplot(midsagittal, aes(Y, smoothed, colour = phone, group = file)) +
  geom_path()

ggplot(midsagittal, aes(Y, interp, colour = phone, linetype = phone)) +
  geom_smooth(method = "loess")

ggplot(parasaggital_l, aes(Y, interp, colour = phone, group = file)) +
  geom_path()

ggplot(parasaggital_l, aes(Y, interp, colour = phone)) +
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(parasaggital_r, aes(Y, interp, colour = phone, group = file)) +
  geom_path()